clear all
set more off
cap log close

do "H:\Lavecchia_7086\to-transfer-jan-2022\RESTAT_Replication_Programs\0_Set_Directories.do"


cap log using "$dir_log\3c_Predicted.log", replace

use "$dir_data\capital_income_final_0016.dta"
keep year lin__i clkgxi cpi_to2016 ggex tirc_i
drop  if ggex_>100000 & ggex_!=. 
gen temp=clkgxi*cpi_to2016
bysort lin__i: egen clkgxi0016=total(temp)
drop temp
*drop if year>=2003
sort lin__i
foreach year in 00 01 02   {
gen temp=clkgxi*cpi_to2016 if year==2000+`year' 
by lin__i: egen clkgxi`year'=max(temp)
drop temp
replace clkgxi`year'=0 if clkgxi`year'==.
}
quietly bysort lin__i : gen dup=cond(_N==0,1,_n)
tab dup
keep if dup<=1
drop dup
keep lin__i clkgxi00 clkgxi01 clkgxi02 clkgxi0016
save "$dir_data\clkgxi_total_0016.dta", replace


* Assemble the data
clear
use "$dir_data\data_8299_with_demographics.dta"

/* New sample restrictions: keep only those age 20-90 in year 1990 (create balanced panel) */
drop if age90 < 20
drop if age90 > 90

/* NOT NEEDED I DON'T THINK - FEB 25 ,2020 
sort lin__i year
merge lin__i year using "$dir_data\ggex_8299.dta"
keep if _merge!=2
drop _merge
*/

* identify people who report too large of an exemption
gen temp= ggex_/0.5 if year>=1984 & year<=1987
replace temp= ggex_/0.667 if year>=1988 & year<=1989
replace temp= ggex_/0.75 if year>=1990 & year<=1994
gen flag=.
replace flag=1 if temp>100000 & temp!=.
replace flag=1 if temp>50000 & temp!=. & (year==1986 | year==1987)
drop temp

gen temp=clkgxi*cpi_to2016 if year>=1982 & year<=1999 & flag!=1 & year!=1994
bysort lin__i: egen clkgxi8299=total(temp)
drop temp
replace clkgxi8299=0 if clkgxi8299==.

*drop if year>=1985 & year<=1999

sort lin__i 
merge lin__i  using "$dir_data\clkgxi_total_0016.dta"
keep if _merge!=2
drop _merge


* calculate cap gains in 1982-87
sort lin__i 
foreach year in 82 83 84   {
gen temp=clkgxi*cpi_to2016 if year==1900+`year' 
by lin__i: egen clkgxi`year'=max(temp)
drop temp
replace clkgxi`year'=0 if clkgxi`year'==.
*
gen temp=tirc_i if year==1900+`year' 
by lin__i: egen tirc_i`year'=max(temp)
drop temp
replace tirc_i`year'=0 if tirc_i`year'==.
}

replace clkgxi00=0 if clkgxi00==.
replace clkgxi01=0 if clkgxi01==.
replace clkgxi02=0 if clkgxi02==.
generate up=(clkgxi82+clkgxi83+clkgxi84<clkgxi00+clkgxi01+clkgxi02)

/* NO LONGER NEEDED - added by Adam on February 25, 2020 
/* The following added by Adam on Feb 21, 2020 */
gen tirc_i2 = tirc_i^2
gen tirc_i3 = tirc_i^3
gen tirc_i4 = tirc_i^4

gen clkgxi_cpi = clkgxi*cpi_to2016

xi: reg clkgxi_cpi age age2 age3 age4 tirc_i tirc_i2 tirc_i3 tirc_i4 c.age#c.tirc_i c.age2#c.tirc_i c.age3#c.tirc_i c.age4#c.tirc_i c.age#c.tirc_i2 c.age2#c.tirc_i2 c.age3#c.tirc_i2 c.age4#c.tirc_i2 c.age#c.tirc_i3 c.age2#c.tirc_i3 c.age3#c.tirc_i3 c.age4#c.tirc_i3 c.age#c.tirc_i4 c.age2#c.tirc_i4 c.age3#c.tirc_i4 c.age4#c.tirc_i4 i.fcmp_i i.fsizei i.num_sxco_i i.tnkidi i.num_iemcop if year >= 1982 & year <= 1984

predict pred_t_clkgxi_cpi, xb
replace pred_t_clkgxi_cpi = 0 if pred_t_clkgxi_cpi < 0

bysort lin__i: egen pred_8299_clkgxi_cpi = total(pred_t_clkgxi_cpi)
gen diff1 = pred_8299_clkgxi_cpi - clkgxi8299

/* END of new stuff */
*/

* Calculate Capial Gais in 1982-1984
gen clkgxi8284 = clkgxi82 + clkgxi83 + clkgxi84

* Calculate Capial Gains in 1985-2016
sort lin__i 
foreach year in 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99  {
gen temp=clkgxi*cpi_to2016 if year==1900+`year' 
by lin__i: egen clkgxi`year'=max(temp)
drop temp
replace clkgxi`year'=0 if clkgxi`year'==.
*
gen temp=tirc_i if year==1900+`year' 
by lin__i: egen tirc_i`year'=max(temp)
drop temp
replace tirc_i`year'=0 if tirc_i`year'==.
}


replace clkgxi0016=0 if clkgxi0016==.

gen clkgxi8516 = clkgxi85 + clkgxi86 + clkgxi87 + clkgxi88 + clkgxi89 + clkgxi90 + clkgxi91 + clkgxi92 + clkgxi93 + clkgxi94 + clkgxi95 + clkgxi96 + clkgxi97 + clkgxi98 + clkgxi99 + clkgxi0016
replace clkgxi8516 = 0 if clkgxi8516 == .

* Calculate Capial Gains in 1985-1999
gen clkgxi8599 = clkgxi85 + clkgxi86 + clkgxi87 + clkgxi88 + clkgxi89 + clkgxi90 + clkgxi91 + clkgxi92 + clkgxi93 + clkgxi94 + clkgxi95 + clkgxi96 + clkgxi97 + clkgxi98 + clkgxi99
replace clkgxi8599 = 0 if clkgxi8599 == .

// August 5, 2020 change: Use reported/realized capital gains from 1985-1993 (not 1982-1993) to define T and C groups b/c earlier year realizations don't count towards the exemption
* Calculate Capial Gains in 1985-1993
gen clkgxi8593 = clkgxi85 + clkgxi86 + clkgxi87 + clkgxi88 + clkgxi89 + clkgxi90 + clkgxi91 + clkgxi92 + clkgxi93 + clkgxi93
replace clkgxi8593 = 0 if clkgxi8593 == .

// January 10, 2022 change: Use reported/realized capital gains from 1985-1989 to define T and C groups
* Calculate Capial Gains in 1985-1989
gen clkgxi8589 = clkgxi85 + clkgxi86 + clkgxi87 + clkgxi88 + clkgxi89
replace clkgxi8589 = 0 if clkgxi8589 == .

// January 10, 2022 change: Use reported/realized capital gains from 1982-2016 to define T and C groups
* Calculate Capial Gains in 1982-2016
gen clkgxi8216 = clkgxi82 + clkgxi83 + clkgxi84 + clkgxi85 + clkgxi86 + clkgxi87 + clkgxi88 + clkgxi89 + clkgxi90 + clkgxi91 + clkgxi92 + clkgxi93 + clkgxi94 + clkgxi95 + clkgxi96 + clkgxi97 + clkgxi98 + clkgxi99 + clkgxi0016
replace clkgxi8216 = 0 if clkgxi8216 == .

// January 10, 2022 change: Use reported/realized capital gains from 1982-1984 x 10 to predict lifetime capital gains to later define T & C groups
* Calculate Capial Gains in 1982-1984 x 10
gen clkgxi8284_times10 = 10*(clkgxi82 + clkgxi83 + clkgxi84)
replace clkgxi8284_times10 = 0 if clkgxi8284_times10 == .

* Calculate Capial Gains in 1982-1993
*gen clkgxi8293 = clkgxi82 + clkgxi83 + clkgxi84 + clkgxi85 + clkgxi86 + clkgxi87 + clkgxi88 + clkgxi89 + clkgxi90 + clkgxi91 + clkgxi92 + clkgxi93 + clkgxi93
*replace clkgxi8293 = 0 if clkgxi8293 == .

* Calculate Capial Gains in 1994-2016
gen clkgxi9416 = clkgxi94 + clkgxi95 + clkgxi96 + clkgxi97 + clkgxi98 + clkgxi99 + clkgxi0016
replace clkgxi9416 = 0 if clkgxi9416 == .


cap drop dup
quietly bysort lin__i : gen dup=cond(_N==0,1,_n)
tab dup
keep if dup<=1
drop dup


foreach i in 82 83 84{
gen d`i'=(clkgxi`i'>0)
gen t`i'=(tirc_i`i'>0)
}

foreach i in 91 92 93 94 {
gen d`i'=(clkgxi`i'>0)
gen t`i'=(tirc_i`i'>0)
}

gen age90_2=age90^2
gen age90_3 =age90^3
gen age90_4 =age90^4


foreach i in 82 83 84 {
gen tirc_i`i'_2 = tirc_i`i'^2
gen tirc_i`i'_3 = tirc_i`i'^3
gen tirc_i`i'_4 = tirc_i`i'^4
}

foreach i in 91 92 93 94 {
gen tirc_i`i'_2 = tirc_i`i'^2
gen tirc_i`i'_3 = tirc_i`i'^3
gen tirc_i`i'_4 = tirc_i`i'^4
}

by lin__i: gen expected_5yr_tirc = 0.2*( tirc_i95 + tirc_i96 + tirc_i97 + tirc_i98 + tirc_i99)

* Predict Capial Gais in 1994-2016 as a function of pre-1994 behaviour
areg clkgxi9416 clkgxi91 clkgxi92 clkgxi93 d91 d92 d93 t91 t92 t93 tirc_i91 tirc_i91_2 tirc_i91_3 tirc_i91_4 tirc_i92 tirc_i92_2 tirc_i92_3 tirc_i92_4 tirc_i93 tirc_i93_2 tirc_i93_3 tirc_i93_4 i.fcmp_i i.fsizei i.num_sxco_i i.tnkidi i.num_iemcop expected_5yr_tirc, absorb(age90) 
predict predicted9416, xb
replace predicted9416=0 if predicted9416<0
gen pred_lifetime_clkgxi_8216 = clkgxi82 + clkgxi83 + clkgxi84 + clkgxi8593 + predicted9416


* Predict Capial Gais in 1995-2016 using all 1985-1994 capital gains
areg clkgxi9416 clkgxi8593 d91 d92 d93 t91 t92 t93 tirc_i91 tirc_i91_2 tirc_i91_3 tirc_i91_4 tirc_i92 tirc_i92_2 tirc_i92_3 tirc_i92_4 tirc_i93 tirc_i93_2 tirc_i93_3 tirc_i93_4 i.fcmp_i i.fsizei i.num_sxco_i i.tnkidi i.num_iemcop expected_5yr_tirc, absorb(age90) 
predict predicted9416_v2, xb
replace predicted9416_v2=0 if predicted9416_v2<0
gen pred_lifetime_clkgxi_8216_v2 = clkgxi82 + clkgxi83 + clkgxi84 +  clkgxi8593 + predicted9416_v2


* Predict Capial Gais in 1985-2016 as a function of 1982-1984 behaviour
reg clkgxi8516 clkgxi82 clkgxi83 clkgxi84 up d82 d83 d84 t82 t83 t84 age82 tirc_i82 tirc_i82_2 tirc_i82_3 tirc_i82_4 tirc_i83 tirc_i83_2 tirc_i83_3 tirc_i83_4 tirc_i84 tirc_i84_2 tirc_i84_3 tirc_i84_4 age90 age90_2 age90_3 age90_4 i.fcmp_i i.fsizei i.num_sxco_i i.tnkidi i.num_iemcop  
predict predicted8516, xb
replace predicted8516=0 if predicted8516<0

* Predict Capial Gais in 1985-1999 as a function of 1982-1984 behaviour
reg clkgxi8599 clkgxi82 clkgxi83 clkgxi84 up d82 d83 d84 t82 t83 t84 age82 tirc_i82 tirc_i82_2 tirc_i82_3 tirc_i82_4 tirc_i83 tirc_i83_2 tirc_i83_3 tirc_i83_4 tirc_i84 tirc_i84_2 tirc_i84_3 tirc_i84_4 age90 age90_2 age90_3 age90_4 i.fcmp_i i.fsizei i.num_sxco_i i.tnkidi i.num_iemcop   
predict predicted8599, xb
replace predicted8599=0 if predicted8599<0

reg clkgxi8216 clkgxi82 clkgxi83 clkgxi84 up d82 d83 d84 t82 t83 t84 age82 tirc_i82 tirc_i83 tirc_i84  age90 age90_2 age90_3 age90_4 i.fcmp_i i.fsizei   i.num_sxco_i i.tnkidi i.num_iemcop  
predict predicted8216, xb
replace predicted8216=0 if predicted8216<0

reg clkgxi8299 clkgxi82 clkgxi83 clkgxi84 up d82 d83 d84 t82 t83 t84 age82 tirc_i82 tirc_i83 tirc_i84 age90 age90_2 age90_3 age90_4  i.fcmp_i i.fsizei  i.num_sxco_i i.tnkidi i.num_iemcop  
predict predicted8299, xb
replace predicted8299=0 if predicted8299<0

gen diff8516 = predicted8516 - clkgxi8516
gen diff8599 = predicted8599 - clkgxi8599
gen diff8216 = predicted8216 - clkgxi8216
gen diff8299 = predicted8299 - clkgxi8299
gen hybrid_diff8216 = pred_lifetime_clkgxi_8216 - clkgxi8216
gen hybrid_diff8216_v2 = pred_lifetime_clkgxi_8216_v2 - clkgxi8216

sum clkgxi8299 predicted8299 diff8299 clkgxi8216 predicted8216 diff8216 clkgxi8599 predicted8599 diff8599 clkgxi8516 predicted8516 diff8516 clkgxi8216 pred_lifetime_clkgxi_8216 hybrid_diff8216 clkgxi8216 pred_lifetime_clkgxi_8216_v2 hybrid_diff8216_v2 

sum clkgxi8299 predicted8299 diff8299 clkgxi8216 predicted8216 diff8216 clkgxi8599 predicted8599 diff8599 clkgxi8516 predicted8516 diff8516 clkgxi8216 pred_lifetime_clkgxi_8216 hybrid_diff8216 clkgxi8216 pred_lifetime_clkgxi_8216_v2 hybrid_diff8216_v2, d


keep lin__i predicted8216 clkgxi8299 predicted8299 up predicted8516 clkgxi8516 predicted8599 clkgxi8599 clkgxi8216 pred_lifetime_clkgxi_8216 pred_lifetime_clkgxi_8216_v2 clkgxi8593 clkgxi8589 clkgxi8216 clkgxi8284_times10 predicted9416 predicted9416_v2
* predicted8893 clkgxi8893

save "$dir_data\data_8593_predicted.dta", replace






